# Building Census Tract to Region Crosswalk
library(pacman)
p_load(
  here, fst, data.table, readxl, janitor 
)

# Loading zip code to census tract crosswalk
tract_zip_dt = read_xlsx(
  here("Data/zip-tract-xwalks/TRACT_ZIP_122019.xlsx")
) |> data.table() |> clean_names()

# Loading Power Profiler Zip to eGRID subregion crosswalk 
zip_egrid_dt = read_xlsx(
  here('Data/electricity-generation/egrid/power_profiler_zipcode_tool_2019.xlsx'),
  sheet = 'Zip-subregion'
) |> data.table() |> clean_names() 

# Merging Tracts x Zip to Zip x eGRID Subregion
tract_egrid_dt = 
  merge(
    tract_zip_dt, 
    zip_egrid_dt,
    by.x = 'zip',
    by.y = 'zip_character'
  ) %>% 
  .[,.(
    res_tot = sum(res_ratio, na.rm =TRUE)),
    by = .(tract, egrid_subregion = e_grid_subregion_number_1)
  ] %>% # Picking egrid subregion with most residences
  .[,.SD[which.max(res_tot)], keyby = tract]

# Table matching egrid subregions to NERC regions 
egrid_nerc_dt = 
  read.fst(
    here("Data/electricity-generation/unit-info-dt.fst"), 
    as.data.table = T
  )[!is.na(egrid_subregion),
    .N,
    keyby = .(egrid_subregion, nerc_adj)
  ][, # Picking nerc_adj with most units in it for each subregion
    .SD[which.max(N)], 
    keyby = egrid_subregion
  ]

# Merging tracts to NERC regions!
# Note that this excludes tracts in AK/HI/PR
tract_nerc_dt = 
  merge(
    tract_egrid_dt,
    egrid_nerc_dt,
    by = 'egrid_subregion'
  )[,.(tract, nerc_adj)]

# Saving the results 
write.csv(
  tract_nerc_dt, 
  here('Data/electricity-generation/output/tract-nerc.csv')
)

# Tract map of different NERC regions 
p_load(tigris, sf, dplyr, purrr, stringr, haven, ggplot2)
options(tigris_use_cache = TRUE)

# Predictions from structural code
merged_dt = 
  cbind(
    fread(here("Data/CleanedData/MergedData.csv")),
    read_dta(here("Data/CleanedData/BI_tCollPercPolnm0Base.dta")),
    read_dta(here("Data/CleanedData/HH_tract.dta"))
  ) |>
  merge(
    fread(here("Data/census-regions.csv")),
    by.x = "state", 
    by.y = "State Code",
    all.x = T
  )

# Now getting shape files
states_sf = 
  states(year = 2000, cb = TRUE) |>
  filter( # Limiting to continental US
    !(STATE %in% c("02","15","60","66","69","72","78"))
  ) |>
  clean_names()

tract_sf =
  map_dfr(
    merged_dt$state_fips |> unique() |> str_pad(width = 2, side = "left", pad = "0"),
    \(st){tracts(cb = TRUE, year = 2015, state = st)}
  ) |> clean_names()

tract_nerc_sf = 
  left_join(
    tract_sf,
    tract_nerc_dt,
    by = c('geoid'='tract')
  ) |>
  st_transform(crs = 2163)

# Filling in missing values by finding closest tract 
tract_nerc_na_sf = filter(tract_nerc_sf, is.na(nerc_adj))
tract_nerc_not_na_sf = filter(tract_nerc_sf, !is.na(nerc_adj))
# Finding closest nerc_adj
closest_dt = 
  map_dfr(
    seq_len(nrow(tract_nerc_na_sf)),
    \(i){
      data.table(
        geoid = tract_nerc_na_sf[i,]$geoid, 
        nerc_adj_closest = tract_nerc_not_na_sf[
          which.min(st_distance(tract_nerc_not_na_sf, tract_nerc_na_sf[i,])),
        ]$nerc_adj
      )
    }
  )

# Adding back into the data 
tract_nerc_sf = 
  left_join(
    tract_nerc_sf, 
    closest_dt, 
    by = 'geoid'
  ) |>
  mutate(
    nerc_adj = ifelse(
      is.na(nerc_adj), nerc_adj_closest,
      nerc_adj
    ) |> factor(levels = c(
      'CAL','WECC','TRE','MRO','NPCC','RFC','SERC'
    ))
  ) |>
  select(-nerc_adj_closest)

# Making the map

theme_set(theme_void(base_size = 14)+
            theme(legend.position = 'bottom'))
nerc_adj_map = 
  ggplot() +
  geom_sf(
    data = tract_nerc_sf |> filter(!is.na(nerc_adj)), 
    aes(fill = nerc_adj,color = nerc_adj), 
    lwd = 0.1
  ) + 
  geom_sf(data = states_sf, fill = NA, color = 'black') +
  scale_fill_brewer(palette = 'Dark2', name = "Region", aesthetics = c('fill','color')) 
ggsave(
  plot = nerc_adj_map,
  filename = here('figures/maps/nerc-adj-map.jpeg'),
  width = 10, height = 7,
  bg = 'white'
)


# Collecting units with missing nerc region 
unit_info_sf = 
  read.fst(
    here("Data/electricity-generation/unit-info-dt.fst"), 
    as.data.table = T
  )[is.na(nerc_adj) & !is.na(lat)] |>
  st_as_sf(
    coords = c('lon','lat'),
    crs = 4326
  )
ggplot() +
  geom_sf(data = tract_sf, aes(fill = nerc_adj,color = nerc_adj)) + 
  geom_sf(data = states_sf, fill = NA, color = 'black', size = 0.35) +
  geom_sf(data = unit_info_sf, color = 'red') +
  scale_fill_brewer(palette = 'Dark2', name = "NERC") +
  scale_color_brewer(palette = 'Dark2', name = "NERC") +
  theme_minimal() 




